package com.jida.hadoop.mr.wz.dataclean;

import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class TMatcher {
	//正则提取字符
	public static String getMatcherStr(String managers){
		String str = "(?<=://)(.*?)(?=/)"; 
		Pattern pattern = Pattern.compile(str);
		Matcher matcher = pattern.matcher(managers);
		while(matcher.find()){
			return matcher.group(1); //matcher.group();
		}
		return "";
	    }
	
	//正则判断域名或ip地址
	public static String getDomain(String domain){
		//判断IP
		String ip = "([1-9]|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])(\\.([1-9]|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])){3}";
		if(domain.matches(ip)){
			return domain;
		}
		//判断域名
		String str= "((\\w*|\\w*-\\w*)\\.?\\.(com.cn|net.cn|gov.cn|org\\.nz|org.cn|com|net|org|gov|cc|biz|info|cn|hk|in|am|im|fm|tv|co|me|us|io|mobi|pw|so|gs|top|la))$";
		Pattern pattern = Pattern.compile(str);
		Matcher matcher = pattern.matcher(domain);
		while(matcher.find()){
			return matcher.group();
		}
		return "";
	}	
}